Take-home Exercise 2

Spatial Point Patterns Analysis of Airbnb Listing in Singapore.

Yu Yiling https://www.linkedin.com/in/yiling-yu/
09-25-2021
pre code, pre, code {
  white-space: pre !important;
  overflow-x: scroll !important;
  word-break: keep-all !important;
  word-wrap: initial !important;
}

Take-home Exercise 2 requirements

Things learned from Take-home Exercise 1

Note!

Data used

1. Install packages

2. Import and transform data

MRT_sf <- st_read(dsn = "data/LTA_TrainStation_Aug2021", layer="MRTLRTStnPtt")
Reading layer `MRTLRTStnPtt' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\LTA_TrainStation_Aug2021' 
  using driver `ESRI Shapefile'
Simple feature collection with 171 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 6138.311 ymin: 27555.06 xmax: 45254.86 ymax: 47854.2
Projected CRS: SVY21
sg_sf <- st_read(dsn = "data/sg", layer="CostalOutline")
Reading layer `CostalOutline' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\sg' 
  using driver `ESRI Shapefile'
Simple feature collection with 60 features and 4 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 2663.926 ymin: 16357.98 xmax: 56047.79 ymax: 50244.03
Projected CRS: SVY21
bus_sf <- st_read(dsn = "data/BusStopLocation", layer="BusStop")
Reading layer `BusStop' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\BusStopLocation' 
  using driver `ESRI Shapefile'
Simple feature collection with 5156 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 4427.938 ymin: 26482.1 xmax: 48282.5 ymax: 52983.82
Projected CRS: SVY21
listings21 <- read_csv("data/Airbnb_listings/29062021.csv")
listings19 <- read_csv("data/Airbnb_listings/30062019.csv")

hotels <- read_csv("data/OneMap_Data/hotels.csv")
tourism <- read_csv("data/OneMap_Data/tourism.csv")
hawker_sf <- st_read("data/hawker_centres/hawker-centres-geojson.geojson") %>%
  st_transform(crs = 3414)
Reading layer `hawker-centres-geojson' from data source 
  `C:\yiling-yu\IS415_Blog\_posts\2021-09-25-take-home-exercise-2\data\hawker_centres\hawker-centres-geojson.geojson' 
  using driver `GeoJSON'
Simple feature collection with 125 features and 2 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 103.6974 ymin: 1.272716 xmax: 103.9882 ymax: 1.449217
z_range:       zmin: 0 zmax: 0
Geodetic CRS:  WGS 84
listings21_sf <- st_as_sf(listings21, 
                       coords = c("longitude", "latitude"),
                       crs=4326) %>%
  st_transform(crs = 3414)

listings19_sf <- st_as_sf(listings19, 
                       coords = c("longitude", "latitude"),
                       crs=4326) %>%
  st_transform(crs = 3414)

hotels_sf <- st_as_sf(hotels, 
                       coords = c("Lng", "Lat"),
                       crs=4326) %>%
  st_transform(crs = 3414)

tourism_sf <- st_as_sf(tourism, 
                       coords = c("Lng", "Lat"),
                       crs=4326) %>%
  st_transform(crs = 3414)

MRT_sf <- st_set_crs(MRT_sf, 3414)
sg_sf <- st_set_crs(sg_sf, 3414)
bus_sf <- st_set_crs(bus_sf, 3414)

Have a glance at the point maps

tmap_mode("view")
tmap_options(check.and.fix = TRUE)

tm_shape(sg_sf) +
  tm_polygons() +
tm_shape(listings19_sf) +
  tm_dots(alpha=0.4,
          col="blue",
          size=0.05) +
tm_shape(listings21_sf) +
  tm_dots(alpha=0.4,
          col="red",
          size=0.05)
tmap_mode("plot")

3. Geospatial data wrangling

a. Converting sf data frames to Spatial class

MRT <- as_Spatial(MRT_sf)
listings21 <- as_Spatial(listings21_sf)
listings19 <- as_Spatial(listings19_sf)
hotels <- as_Spatial(hotels_sf)
tourism <- as_Spatial(tourism_sf)
sg <- as_Spatial(st_zm(sg_sf))
hawker <- as_Spatial(hawker_sf)
bus <- as_Spatial(bus_sf)

b. Converting the Spatial class into generic sp object

MRT_sp <- as(MRT, "SpatialPoints")
listings21_sp <- as(listings21, "SpatialPoints")
listings19_sp <- as(listings19, "SpatialPoints")
hotels_sp <- as(hotels, "SpatialPoints")
tourism_sp <- as(tourism, "SpatialPoints")
sg_sp <- as(sg, "SpatialPolygons")
hawker_sp <- as(hawker, "SpatialPoints")
bus_sp <- as(bus, "SpatialPoints")

c. Converting the generic sp object into spatstat’s ppp object

MRT_ppp <- as(MRT_sp, "ppp")
listings21_ppp <- as(listings21_sp, "ppp")
listings19_ppp <- as(listings19_sp, "ppp")
hotels_ppp <- as(hotels_sp, "ppp")
tourism_ppp <- as(tourism_sp, "ppp")
hawker_ppp <- as(hawker_sp, "ppp")
bus_ppp <- as(bus_sp, "ppp")
summary(MRT_ppp)
Planar point pattern:  171 points
Average intensity 2.153565e-07 points per square unit

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [6138.31, 45254.86] x [27555.06, 47854.2] units
                    (39120 x 20300 units)
Window area = 794032000 square units
summary(listings21_ppp)
Planar point pattern:  4238 points
Average intensity 5.114513e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [7406.99, 43337.89] x [25330, 48391.55] units
                    (35930 x 23060 units)
Window area = 828622000 square units
summary(listings19_ppp)
Planar point pattern:  8293 points
Average intensity 9.345289e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [7215.57, 44098.31] x [25166.35, 49226.35] units
                    (36880 x 24060 units)
Window area = 887399000 square units
summary(hotels_ppp)
Planar point pattern:  422 points
Average intensity 5.58414e-07 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [5939.24, 45334.18] x [25379.44, 44562.4] units
                    (39390 x 19180 units)
Window area = 755712000 square units
summary(tourism_ppp)
Planar point pattern:  107 points
Average intensity 3.947904e-13 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 2 decimal places
i.e. rounded to the nearest multiple of 0.01 units

Window: rectangle = [-13597396, 43660] x [22869, 19891558] units
                    (13640000 x 19870000 units)
Window area = 2.7103e+14 square units
summary(hawker_ppp)
Planar point pattern:  125 points
Average intensity 1.978798e-07 points per square unit

Coordinates are given to 2 decimal places
i.e. rounded to the nearest multiple of 0.01 units

Window: rectangle = [12874.19, 45241.4] x [28355.97, 47872.53] units
                    (32370 x 19520 units)
Window area = 631697000 square units
summary(bus_ppp)
Planar point pattern:  5156 points
Average intensity 4.436334e-06 points per square unit

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Window: rectangle = [4427.94, 48282.5] x [26482.1, 52983.82] units
                    (43850 x 26500 units)
Window area = 1162220000 square units

d. Handling duplicate points

listings21_ppp_jit <- rjitter(listings21_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(listings21_ppp_jit))
[1] FALSE
listings19_ppp_jit <- rjitter(listings19_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(listings19_ppp_jit))
[1] FALSE
hotels_ppp_jit <- rjitter(hotels_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(hotels_ppp_jit))
[1] FALSE
tourism_ppp_jit <- rjitter(tourism_ppp, 
                             retry=TRUE, 
                             nsim=1, 
                             drop=TRUE)

any(duplicated(tourism_ppp_jit))
[1] FALSE
any(duplicated(MRT_ppp))
[1] FALSE
any(duplicated(hawker_ppp))
[1] FALSE
any(duplicated(bus_ppp))
[1] FALSE

e. Creating owin object

covert sg spatialpolygon object into owin object of spatstat

sg_owin <- as(sg_sp, "owin")
plot(sg_owin)

f. Combining point events object and owin object

listings21SG_ppp = listings21_ppp_jit[sg_owin]
listings19SG_ppp = listings19_ppp_jit[sg_owin]
hotelsSG_ppp = hotels_ppp_jit[sg_owin]
tourismSG_ppp = tourism_ppp_jit[sg_owin]
MRTSG_ppp = MRT_ppp[sg_owin]
hawkerSG_ppp = hawker_ppp[sg_owin]
busSG_ppp = bus_ppp[sg_owin]
par(mar=c(1,1,1,1))
par(mfrow=c(4,2))
plot(listings21SG_ppp)
plot(listings19SG_ppp)
plot(hotelsSG_ppp)
plot(tourismSG_ppp)
plot(MRTSG_ppp)
plot(hawkerSG_ppp)
plot(busSG_ppp)

4. Section A: Airbnb Distribution in 2019

4.1 First-order Spatial Point Patterns Analysis: A. deriving kernel density estimation (KDE) layer for visualising and exploring the intensity of point processes

a. Computing kernel density estimation using automatic bandwidth selection method

Using sigma = bw.diggle

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings19SG_ppp.km <- rescale(listings19SG_ppp, 1000, "km")
#plot
kde_listings19SG_bw <- density(listings19SG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
hotelsSG_ppp.km <- rescale(hotelsSG_ppp, 1000, "km")
#plot
kde_hotelsSG_bw <- density(hotelsSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
tourismSG_ppp.km <- rescale(tourismSG_ppp, 1000, "km")
#plot
kde_tourismSG_bw <- density(tourismSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian") 

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
MRTSG_ppp.km <- rescale(MRTSG_ppp, 1000, "km")
#plot
kde_MRTSG_bw <- density(MRTSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
hawkerSG_ppp.km <- rescale(hawkerSG_ppp, 1000, "km")
#plot
kde_hawkerSG_bw <- density(hawkerSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
busSG_ppp.km <- rescale(busSG_ppp, 1000, "km")
#plot
kde_busSG_bw <- density(busSG_ppp.km,
                              sigma=bw.diggle,
                              edge=TRUE,
                            kernel="gaussian")

par(mar=c(1,1,1,1))
par(mfrow=c(3,2))
plot(kde_listings19SG_bw, main = "listings19")
plot(kde_hotelsSG_bw, main = "hotels")
plot(kde_tourismSG_bw, main = "tourism")
plot(kde_MRTSG_bw, main = "MRT")
plot(kde_hawkerSG_bw, main = "hawker")
plot(kde_busSG_bw, main = "bus")

RETRIEVE THE BANDWIDTH USED TO COMPUTE THE KDE LAYER

cat('bandwidth for listings19: ', bw.diggle(listings19SG_ppp.km), '\n')
cat('bandwidth for hotels: ', bw.diggle(hotelsSG_ppp.km), '\n')
cat('bandwidth for tourism: ', bw.diggle(tourismSG_ppp.km), '\n')
cat('bandwidth for MRT: ', bw.diggle(MRTSG_ppp.km), '\n')
cat('bandwidth for hawker: ', bw.diggle(hawkerSG_ppp.km), '\n')
cat('bandwidth for bus: ', bw.diggle(busSG_ppp.km), '\n')

Let’s try to look at the bandwidths returned by these two methods first

#let's see some examples
bw.diggle(listings19SG_ppp.km)
     sigma 
0.04196627 
bw.ppl(listings19SG_ppp.km)
    sigma 
0.2665422 
bw.diggle(tourismSG_ppp.km)
   sigma 
2.113734 
bw.ppl(tourismSG_ppp.km)
  sigma 
10.8985 
bw.diggle(busSG_ppp.km)
    sigma 
0.3927249 
bw.ppl(busSG_ppp.km)
    sigma 
0.1104879 

Using sigma = bw.ppl

kde_listings19SG_ppl <- density(listings19SG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_hotelsSG_ppl <- density(hotelsSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_tourismSG_ppl <- density(tourismSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian") 

kde_MRTSG_ppl <- density(MRTSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

kde_hawkerSG_ppl <- density(hawkerSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

kde_busSG_ppl <- density(busSG_ppp.km,
                              sigma=bw.ppl,
                              edge=TRUE,
                            kernel="gaussian")

plot and compare bw.diggle and bw.ppl

par(mar=c(1,1,1,1))
par(mfrow=c(6,2))
plot(kde_listings19SG_bw, main = "listings19 diggle")
plot(kde_listings19SG_ppl, main = "listings19 ppl")
plot(kde_hotelsSG_bw, main = "hotels diggle")
plot(kde_hotelsSG_ppl, main = "hotels ppl")
plot(kde_tourismSG_bw, main = "tourism diggle")
plot(kde_tourismSG_ppl, main = "tourism ppl")
plot(kde_MRTSG_bw, main = "MRT diggle")
plot(kde_MRTSG_ppl, main = "MRT ppl")
plot(kde_hawkerSG_bw, main = "hawker diggle")
plot(kde_hawkerSG_ppl, main = "hawker ppl")
plot(kde_busSG_bw, main = "bus diggle")
plot(kde_busSG_ppl, main = "bus ppl")

b. Computing kernel density estimation using fixed/adaptive bandwidth method

using fixed bandwidth

#compute a KDE layer by defining a bandwidth of 1000 meter. Notice the sigma value used is 1. This is because the unit of measurement of childcareSG_ppp.km object is in kilometer, hence the 1000m is 1km.

kde_listings19SG_bw_1 <- density(listings19SG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_hotelsSG_bw_1 <- density(hotelsSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_tourismSG_bw_1 <- density(tourismSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian") 

kde_MRTSG_bw_1 <- density(MRTSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

kde_hawkerSG_bw_1 <- density(hawkerSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

kde_busSG_bw_1 <- density(busSG_ppp.km,
                              sigma=1,
                              edge=TRUE,
                            kernel="gaussian")

using adaptive bandwidth

kde_listings19SG_adaptive <- adaptive.density(listings19SG_ppp.km, method="kernel")

kde_hotelsSG_adaptive <- adaptive.density(hotelsSG_ppp.km, method="kernel")

kde_tourismSG_adaptive <- adaptive.density(tourismSG_ppp.km, method="kernel")

kde_MRTSG_adaptive <- adaptive.density(MRTSG_ppp.km, method="kernel")

kde_hawkerSG_adaptive <- adaptive.density(hawkerSG_ppp.km, method="kernel")

kde_busSG_adaptive <- adaptive.density(busSG_ppp.km, method="kernel")

plot and compare fixed and adaptive bandwidth

par(mar=c(1,1,1,1))
par(mfrow=c(6,2))
plot(kde_listings19SG_bw_1, main = "listings19 fixed")
plot(kde_listings19SG_adaptive, main = "listings19 adaptive")
plot(kde_hotelsSG_bw_1, main = "hotels fixed")
plot(kde_hotelsSG_adaptive, main = "hotels adaptive")
plot(kde_tourismSG_bw_1, main = "tourism fixed")
plot(kde_tourismSG_adaptive, main = "tourism adaptive")
plot(kde_MRTSG_bw_1, main = "MRT fixed")
plot(kde_MRTSG_adaptive, main = "MRT adaptive")
plot(kde_hawkerSG_bw_1, main = "hawker fixed")
plot(kde_hawkerSG_adaptive, main = "hawker adaptive")
plot(kde_busSG_bw_1, main = "bus fixed")
plot(kde_busSG_adaptive, main = "bus adaptive")

c. Convert KDE output into raster and display the kernel density maps on openstreetmap of Singapore

# convert KDE output into grid
gridded_kde_listings19SG_bw <- as.SpatialGridDataFrame.im(kde_listings19SG_bw)
gridded_kde_hotelsSG_bw <- as.SpatialGridDataFrame.im(kde_hotelsSG_bw)
gridded_kde_tourismSG_bw <- as.SpatialGridDataFrame.im(kde_tourismSG_bw)
gridded_kde_MRTSG_bw <- as.SpatialGridDataFrame.im(kde_MRTSG_bw)
gridded_kde_hawkerSG_bw <- as.SpatialGridDataFrame.im(kde_hawkerSG_bw)
gridded_kde_busSG_bw <- as.SpatialGridDataFrame.im(kde_busSG_bw)

# convert gridded output into raster
kde_listings19SG_bw_raster <- raster(gridded_kde_listings19SG_bw)
kde_hotelsSG_bw_raster <- raster(gridded_kde_hotelsSG_bw)
kde_tourismSG_bw_raster <- raster(gridded_kde_tourismSG_bw)
kde_MRTSG_bw_raster <- raster(gridded_kde_MRTSG_bw)
kde_hawkerSG_bw_raster <- raster(gridded_kde_hawkerSG_bw)
kde_busSG_bw_raster <- raster(gridded_kde_busSG_bw)

# assign projection system to raster
projection(kde_listings19SG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_hotelsSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_tourismSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_MRTSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_hawkerSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_busSG_bw_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")

# visualise the raster kernel density maps on openstreetmap of Singapore
tmap_mode("view")
tmap_options(check.and.fix = TRUE)

listings19SG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_bw_raster) + 
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "listings19SG") +
  tm_basemap('OpenStreetMap')

hotelsSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_hotelsSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "hotelsSG") +
  tm_basemap('OpenStreetMap')

tourismSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_tourismSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "tourismSG") +
  tm_basemap('OpenStreetMap')

MRTSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_MRTSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "MRTSG") +
  tm_basemap('OpenStreetMap')

hawkerSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_hawkerSG_bw_raster) +
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "hawkerSG") +
  tm_basemap('OpenStreetMap')

busSG_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_busSG_bw_raster) + 
  tm_raster("v", alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "busSG") +
  tm_basemap('OpenStreetMap')

tmap_arrange(listings19SG_osmap, hotelsSG_osmap, tourismSG_osmap, MRTSG_osmap, hawkerSG_osmap, busSG_osmap, asp=2, ncol=2)
tmap_mode('plot')

4.2 Second-order Spatial Point Patterns Analysis: Cross K-Function and Cross L-Function

a. Prepare merged ppp objects by using superimpose()

listings19_vs_hotels<- superimpose('listings19'=listings19SG_ppp.km, 'hotels'=hotelsSG_ppp.km)

listings19_vs_tourism<- superimpose('listings19'=listings19SG_ppp.km, 'tourism'=tourismSG_ppp.km)

listings19_vs_MRT<- superimpose('listings19'=listings19SG_ppp.km, 'MRT'=MRTSG_ppp.km)

listings19_vs_hawker<- superimpose('listings19'=listings19SG_ppp.km, 'hawker'=hawkerSG_ppp.km)

listings19_vs_bus<- superimpose('listings19'=listings19SG_ppp.km, 'bus'=busSG_ppp.km)

b. Second-order Multi-tpye Point Patterns Analysis: Cross K-Function

Note:

For each comparison between the 2019 Airbnb listings and one of the factors, I’m going to plot the Kcross and perform a Complete Spatial Randomness(CSR) testing on the Cross K-Function.

The hypothesis are as follows: H0 = The distribution of 2019 Airbnb listings and the factor are spatially independent. H1 = The distribution of 2019 Airbnb listings and the factor are NOT at spatially independent. The null hypothesis will be rejected if p-value is smaller than alpha value of 0.01 (i.e. at 99% confident interval).

i. listings19 vs hotels

listings19_vs_hotels_Kcross <- Kcross(listings19_vs_hotels, 
                           i="listings19", j="hotels",
                           correction='border')
plot(listings19_vs_hotels_Kcross)

listings19_vs_hotels_Kcross.csr <- envelope(listings19_vs_hotels, Kcross, i="listings19", j="hotels", correction='border', nsim=99)

plot(listings19_vs_hotels_Kcross.csr, xlab="distance(km)", xlim=c(0,10))

* The plot above reveals that the are signs that the distribution of childcare centres operate by NT and ST are not independent spatially. Unfortunately, we failed to reject the null hypothesis because the empirical k-cross line is within the envelop of the 99.9% confident interval……………………………………….

ii. listings19 vs tourism

listings19_vs_tourism_Kcross <- Kcross(listings19_vs_tourism, 
                           i="listings19", j="tourism",
                           correction='border')
plot(listings19_vs_tourism_Kcross)

listings19_vs_tourism_Kcross.csr <- envelope(listings19_vs_tourism, Kcross, i="listings19", j="tourism", correction='border', nsim=99)

plot(listings19_vs_tourism_Kcross.csr, xlab="distance(km)", xlim=c(0,10))

iii. listings19 vs MRT

listings19_vs_MRT_Kcross <- Kcross(listings19_vs_MRT, 
                           i="listings19", j="MRT",
                           correction='border')
plot(listings19_vs_MRT_Kcross)

listings19_vs_MRT_Kcross.csr <- envelope(listings19_vs_MRT, Kcross, i="listings19", j="MRT", correction='border', nsim=99)

plot(listings19_vs_MRT_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

iv. listings19 vs hawker

listings19_vs_hawker_Kcross <- Kcross(listings19_vs_hawker, 
                           i="listings19", j="hawker",
                           correction='border')
plot(listings19_vs_hawker_Kcross)

listings19_vs_hawker_Kcross.csr <- envelope(listings19_vs_hawker, Kcross, i="listings19", j="hawker", correction='border', nsim=99)

plot(listings19_vs_hawker_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

v. listings19 vs bus

listings19_vs_bus_Kcross <- Kcross(listings19_vs_bus, 
                           i="listings19", j="bus",
                           correction='border')
plot(listings19_vs_bus_Kcross)

listings19_vs_bus_Kcross.csr <- envelope(listings19_vs_bus, Kcross, i="listings19", j="bus", correction='border', nsim=99)

plot(listings19_vs_bus_Kcross.csr, xlab="distance(m)", xlim=c(0,10))

c. Second-order Multi-tpye Point Patterns Analysis: Cross L-Function

Note:

Cross L-Function and Cross K-Function are the actually the same but just Cross L-Function is the standardized version of Cross K-Function. Their CSR testing hypothesis are also the same.

i. listings19 vs hotels

listings19_vs_hotels_Lcross <- Lcross(listings19_vs_hotels, 
                           i="listings19", j="hotels",
                           correction='border')
plot(listings19_vs_hotels_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_hotels_Lcross.csr <- envelope(listings19_vs_hotels, Lcross, i="listings19", j="hotels", correction='border', nsim=99)

plot(listings19_vs_hotels_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

ii. listings19 vs tourism

listings19_vs_tourism_Lcross <- Lcross(listings19_vs_tourism, 
                           i="listings19", j="tourism",
                           correction='border')
plot(listings19_vs_tourism_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_tourism_Lcross.csr <- envelope(listings19_vs_tourism, Lcross, i="listings19", j="tourism", correction='border', nsim=99)

plot(listings19_vs_tourism_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

iii. listings19 vs MRT

listings19_vs_MRT_Lcross <- Lcross(listings19_vs_MRT, 
                           i="listings19", j="MRT",
                           correction='border')
plot(listings19_vs_MRT_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_MRT_Lcross.csr <- envelope(listings19_vs_MRT, Lcross, i="listings19", j="MRT", correction='border', nsim=99)

plot(listings19_vs_MRT_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

iv. listings19 vs hawker

listings19_vs_hawker_Lcross <- Lcross(listings19_vs_hawker, 
                           i="listings19", j="hawker",
                           correction='border')
plot(listings19_vs_hawker_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_hawker_Lcross.csr <- envelope(listings19_vs_hawker, Lcross, i="listings19", j="hawker", correction='border', nsim=99)

plot(listings19_vs_hawker_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

v. listings19 vs bus

listings19_vs_bus_Lcross <- Lcross(listings19_vs_bus, 
                           i="listings19", j="bus",
                           correction='border')
plot(listings19_vs_bus_Lcross, . -r ~ r, 
     xlab = "distance(km)", 
     xlim=c(0, 10))

listings19_vs_bus_Lcross.csr <- envelope(listings19_vs_bus, Lcross, i="listings19", j="bus", correction='border', nsim=99)

plot(listings19_vs_bus_Lcross.csr, . -r ~ r, xlab="distance(km)", xlim=c(0,10))

d. Summary: Cross K-Function and Cross L-Function

5. Section B: Impact of COVID-19

5.1 First-order Spatial Point Patterns Analysis: A. Derive kernel density maps of all Airbnb listings and Airbnb by room type as at June 2019 and June 2021

a. EDA and Geospatial data wrangling

check 2019 and 2020 room types

unique(listings19_sf$"room_type")
[1] "Private room"    "Entire home/apt" "Shared room"    
unique(listings21_sf$"room_type")
[1] "Private room"    "Entire home/apt" "Shared room"    
[4] "Hotel room"     

filter important field and convert to SpatialPointsDataFrame

listings19_sf <-listings19_sf %>%
  select(room_type)
listings21_sf <-listings21_sf %>%
  select(room_type)

listings19 <- as_Spatial(listings19_sf)
listings21 <- as_Spatial(listings21_sf)

change marked field to factor data type

listings19@data$"room_type" <- as.factor(listings19@data$"room_type")
listings21@data$"room_type" <- as.factor(listings21@data$"room_type")

peek the distribution

tmap_mode("plot")
tm_shape(sg) +
  tm_borders(alpha = 0.5) +
tm_shape(listings19) +
  tm_dots(col = 'room_type', 
          size = 0.5) +
  tm_layout(title = "2019 room_type") +
tm_facets(by="room_type")
tm_shape(sg) +
  tm_borders(alpha = 0.5) +
tm_shape(listings21) +
  tm_dots(col = 'room_type', 
          size = 0.5) +
  tm_layout(title = "2021 room_type") +
tm_facets(by="room_type")

Convert the SpatialPointsDataFrame into ppp (point pattern in spatstat) format

listings19_ppp <- as(listings19, "ppp")
listings21_ppp <- as(listings21, "ppp")

plot(listings19_ppp)
plot(listings21_ppp)

summary(listings19_ppp)
Marked planar point pattern:  8293 points
Average intensity 9.345289e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Multitype:
                frequency proportion    intensity
Entire home/apt      4264 0.51416860 4.805054e-06
Private room         3582 0.43193050 4.036516e-06
Shared room           447 0.05390088 5.037193e-07

Window: rectangle = [7215.57, 44098.31] x [25166.35, 49226.35] units
                    (36880 x 24060 units)
Window area = 887399000 square units
summary(listings21_ppp)
Marked planar point pattern:  4238 points
Average intensity 5.114513e-06 points per square unit

*Pattern contains duplicated points*

Coordinates are given to 3 decimal places
i.e. rounded to the nearest multiple of 0.001 units

Multitype:
                frequency proportion    intensity
Entire home/apt      1817 0.42874000 2.192796e-06
Hotel room            198 0.04672015 2.389508e-07
Private room         2050 0.48371870 2.473986e-06
Shared room           173 0.04082114 2.087803e-07

Window: rectangle = [7406.99, 43337.89] x [25330, 48391.55] units
                    (35930 x 23060 units)
Window area = 828622000 square units

Avoiding duplicated spatial point event by using jittering method

listings19_ppp_jit <- rjitter(listings19_ppp, retry=TRUE, nsim=1, drop=TRUE)
listings21_ppp_jit <- rjitter(listings21_ppp, retry=TRUE, nsim=1, drop=TRUE)
any(duplicated(listings19_ppp_jit))
[1] FALSE
any(duplicated(listings21_ppp_jit))
[1] FALSE

COMBINING ppp AND owin object

listings19SG_ppp = listings19_ppp_jit[sg_owin]
listings21SG_ppp = listings21_ppp_jit[sg_owin]

plot(listings19SG_ppp)
plot(listings21SG_ppp)

b. Compute kernel density estimation using fixed bandwidth

# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings19SG_ppp.km <- rescale(listings19SG_ppp, 1000, "km")
#plot
kde_listings19SG_km_split <-density(split(listings19SG_ppp.km),
                              sigma=0.18,
                              edge=TRUE,
                            kernel="gaussian")
plot(kde_listings19SG_km_split)
# Rescale KDE values: covert the unit of measurement from meter to kilometer.
listings21SG_ppp.km <- rescale(listings21SG_ppp, 1000, "km")
#plot
kde_listings21SG_km_split <-density(split(listings21SG_ppp.km),
                              sigma=0.18,
                              edge=TRUE,
                            kernel="gaussian")
plot(kde_listings21SG_km_split)

intensity(listings19SG_ppp.km)
Entire home/apt    Private room     Shared room 
       5.693556        4.784029        0.597002 
intensity(listings21SG_ppp.km)
Entire home/apt      Hotel room    Private room     Shared room 
      2.4254041       0.2644438       2.7365931       0.2310545 

c. Convert KDE output into raster and display the kernel density maps on openstreetmap of Singapore

#split
kde_listings19SG_EH<-kde_listings19SG_km_split[[1]]
kde_listings19SG_PR<-kde_listings19SG_km_split[[2]]
kde_listings19SG_SR<-kde_listings19SG_km_split[[3]]

kde_listings21SG_EH<-kde_listings21SG_km_split[[1]]
kde_listings21SG_HR<-kde_listings21SG_km_split[[2]]
kde_listings21SG_PR<-kde_listings21SG_km_split[[3]]
kde_listings21SG_SR<-kde_listings21SG_km_split[[4]]

# convert KDE output into grid
gridded_kde_listings19SG_EH <- as.SpatialGridDataFrame.im(kde_listings19SG_EH)
gridded_kde_listings19SG_PR <- as.SpatialGridDataFrame.im(kde_listings19SG_PR)
gridded_kde_listings19SG_SR <- as.SpatialGridDataFrame.im(kde_listings19SG_SR)

gridded_kde_listings21SG_EH <- as.SpatialGridDataFrame.im(kde_listings21SG_EH)
gridded_kde_listings21SG_HR <- as.SpatialGridDataFrame.im(kde_listings21SG_HR)
gridded_kde_listings21SG_PR <- as.SpatialGridDataFrame.im(kde_listings21SG_PR)
gridded_kde_listings21SG_SR <- as.SpatialGridDataFrame.im(kde_listings21SG_SR)

# convert gridded output into raster
kde_listings19SG_EH_raster <- raster(gridded_kde_listings19SG_EH)
kde_listings19SG_PR_raster <- raster(gridded_kde_listings19SG_PR)
kde_listings19SG_SR_raster <- raster(gridded_kde_listings19SG_SR)

kde_listings21SG_EH_raster <- raster(gridded_kde_listings21SG_EH)
kde_listings21SG_HR_raster <- raster(gridded_kde_listings21SG_HR)
kde_listings21SG_PR_raster <- raster(gridded_kde_listings21SG_PR)
kde_listings21SG_SR_raster <- raster(gridded_kde_listings21SG_SR)

# assign projection system to raster
projection(kde_listings19SG_EH_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings19SG_PR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings19SG_SR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")

projection(kde_listings21SG_EH_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_HR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_PR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")
projection(kde_listings21SG_SR_raster) <- CRS("+init=EPSG:3414 +datum=WGS84 +units=km")

# visualise the raster kernel density maps on openstreetmap of Singapore
tmap_mode("view")
tmap_options(check.and.fix = TRUE)

listings19SG_EH_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_EH_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000), alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Entire home/apt KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_EH_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_EH_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Entire home/apt KDE") + 
  tm_basemap('OpenStreetMap')

listings19SG_PR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_PR_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Private Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_PR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_PR_raster) + 
  tm_raster("v",breaks=c(0, 50, 100, 150, 200, 250, 300, 350, 400, 1000),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Private Room KDE") + 
  tm_basemap('OpenStreetMap')

listings19SG_SR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings19SG_SR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2019 Shared Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_SR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_SR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Shared Room KDE") + 
  tm_basemap('OpenStreetMap')

listings21SG_HR_osmap <- tm_shape(sg_sf) +
  tm_borders(col = 'black',
             lwd = 1,
             alpha = 0.5) +
  tm_shape(kde_listings21SG_HR_raster) + 
  tm_raster("v",breaks=c(0, 10, 20, 30, 40, 50, 60, 70, 80, 100),alpha = 0.7) +
  tm_layout(legend.outside = TRUE, frame = FALSE, title = "2021 Hotel Room KDE") + 
  tm_basemap('OpenStreetMap')

tmap_arrange(listings19SG_EH_osmap,listings21SG_EH_osmap, listings19SG_PR_osmap, listings21SG_PR_osmap, listings19SG_SR_osmap, listings21SG_SR_osmap, listings21SG_HR_osmap, asp=2, ncol=2)
tmap_mode('plot')

5.2 Second-order Spatial Point Patterns Analysis: G-Function and F-Function

Note

For each function used, I will also perform a Complete Spatial Randomness(CSR) testing on it. The hypothesis are the same for all functions:

The hypothesis are as follows: H0 = The distribution of that type of Airbnb are randomly distributed in Singapore. H1= The distribution of that type of Airbnb are not randomly distributed in Singapore. The null hypothesis will be rejected if p-value is smaller than alpha value of 0.01 (i.e. at 99% confident interval).

a. Prepare split ppp objects

listings19SG_EH <- split(listings19SG_ppp.km)[[1]]
listings19SG_PR <- split(listings19SG_ppp.km)[[2]]
listings19SG_SR <- split(listings19SG_ppp.km)[[3]]

listings21SG_EH <- split(listings21SG_ppp.km)[[1]]
listings21SG_PR <- split(listings21SG_ppp.km)[[3]]
listings21SG_SR <- split(listings21SG_ppp.km)[[4]]

b. Analysing Spatial Point Process Using G-Function

Entire Home

# 2019
G_19_EH = Gest(listings19SG_EH, correction = "border")
plot(G_19_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_EH.csr <- envelope(listings19SG_EH, Gest, nsim = 99)
plot(G_19_EH.csr, xlim=c(0,1))

# 2021
G_21_EH = Gest(listings21SG_EH, correction = "border")
plot(G_21_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_EH.csr <- envelope(listings21SG_EH, Gest, nsim = 99)
plot(G_21_EH.csr, xlim=c(0,1))

Private Room

# 2019
G_19_PR = Gest(listings19SG_PR, correction = "border")
plot(G_19_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_PR.csr <- envelope(listings19SG_PR, Gest, nsim = 99)
plot(G_19_PR.csr, xlim=c(0,1))

# 2021
G_21_PR = Gest(listings21SG_PR, correction = "border")
plot(G_21_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_PR.csr <- envelope(listings21SG_PR, Gest, nsim = 99)
plot(G_21_PR.csr, xlim=c(0,1))

Shared Room

# 2019
G_19_SR = Gest(listings19SG_SR, correction = "border")
plot(G_19_SR, xlim=c(0,2))

#MONTE CARLO TEST WITH G-FUCNTION
G_19_SR.csr <- envelope(listings19SG_SR, Gest, nsim = 99)
plot(G_19_SR.csr, xlim=c(0,2))

# 2021
G_21_SR = Gest(listings21SG_SR, correction = "border")
plot(G_21_SR, xlim=c(0,2))

#MONTE CARLO TEST WITH G-FUCNTION
G_21_SR.csr <- envelope(listings21SG_SR, Gest, nsim = 99)
plot(G_21_SR.csr, xlim=c(0,2))

c. Analysing Spatial Point Process Using F-Function

Entire Home

#2019
F_19_EH = Fest(listings19SG_EH)
plot(F_19_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_EH.csr <- envelope(listings19SG_EH, Fest, nsim = 99)
plot(F_19_EH.csr, xlim=c(0,1))

#2021
F_21_EH = Fest(listings21SG_EH)
plot(F_21_EH, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_EH.csr <- envelope(listings21SG_EH, Fest, nsim = 99)
plot(F_21_EH.csr, xlim=c(0,1))

Private Room

#2019
F_19_PR = Fest(listings19SG_PR)
plot(F_19_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_PR.csr <- envelope(listings19SG_PR, Fest, nsim = 99)
plot(F_19_PR.csr, xlim=c(0,1))

#2021
F_21_PR = Fest(listings21SG_PR)
plot(F_21_PR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_PR.csr <- envelope(listings21SG_PR, Fest, nsim = 99)
plot(F_21_PR.csr, xlim=c(0,1))

Shared Room

#2019
F_19_SR = Fest(listings19SG_SR)
plot(F_19_SR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_19_SR.csr <- envelope(listings19SG_SR, Fest, nsim = 99)
plot(F_19_SR.csr, xlim=c(0,1))

#2021
F_21_SR = Fest(listings21SG_SR)
plot(F_21_SR, xlim=c(0,1))

#MONTE CARLO TEST WITH F-FUCNTION
F_21_SR.csr <- envelope(listings21SG_SR, Fest, nsim = 99)
plot(F_21_SR.csr, xlim=c(0,1))

d. Summary: G-Function and K-Function